Abstract
Abstract mRNA vaccines are likely to become widely used for the prevention of infectious diseases in the future. Nevertheless, a notable gap exists in mechanistic data, particularly concerning the potential effects of sequential mRNA immunization or pre-existing immunity on the early innate immune response triggered by vaccination. In this study healthy adults, with or without documented prior SARS-CoV-2 infection, were vaccinated with the BNT162b2/Comirnaty mRNA vaccine. Prior infection conferred significantly stronger induction of proinflammatory and type I interferon related gene signatures, serum cytokines, and monocyte expansion after the prime vaccination. The response to the second vaccination further increased the magnitude of the early innate response in both study groups. The third vaccination did not additionally augment vaccine-induced inflammation. In vitro stimulation of PBMCs with toll-like receptor ligands showed no difference in cytokine responses between groups or pre/post prime vaccination, indicating absence of a trained immunity effect. We observed that levels of pre-existing antigen-specific CD4 T cells, antibody and memory B cells correlated with elements of the early innate response to the first vaccination. Our data thereby indicate that pre-existing memory formed by infection can augment the innate immune activation induced by mRNA vaccines.library(tidyr)
library(ggpubr)
library(ggsci)
library(ComplexHeatmap)
library(GeneOverlap)
library(data.table)
library(kableExtra)
library(forcats)
library(mgsub)
library(DESeq2)
library(pheatmap)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(limma)
library(edgeR)
library(enrichR)
library(gridExtra)
library(stringr)
library(dplyr)
library(ensembldb)
library(EnsDb.Hsapiens.v86)
library("org.Hs.eg.db", character.only = TRUE)
library(factoextra)
library(rafalib)
library(tibble)
library(devtools)
library(clusterProfiler)
library(enrichplot)
library(ggprism)
library(msigdbr)
# Define base directory
data_dir <- "../data/"
# Define gene counts directory
count_dir <- file.path(data_dir, "processed_data_nfcore/batch_1-2/salmon/")
## Gene counts file
count_file <- file.path(count_dir, "salmon.merged.gene_counts.tsv")
# Define metadata directory
sample_metadata_dir <- file.path(data_dir, "metadata/")
## Gene metadata file
gene_metadata_file <- file.path(sample_metadata_dir, "2023-11-222_mart_export_gene_ids_hsa.txt")
## Sample metadata file
sample_metadata_file <- file.path(sample_metadata_dir, "sample_metadata_1.csv")
## Patient metadata file
patient_metadata_file <- file.path(sample_metadata_dir, "patient_metadata.csv")
Wrangle raw counts.
# Load raw counts
counts_raw <- data.table::fread(count_file,
header = TRUE, sep = "\t"
) %>%
as.data.frame() %>%
select(-gene_id)
colnames(counts_raw) <- sub("_L001|_L002", "", colnames(counts_raw))
# Filter for only protein coding genes or mitochondrial/ribosomic genes
gene_id_metadata <- read.csv(gene_metadata_file) %>% filter(Gene.type %in% c("Mt_rRNA", "protein_coding", "rRNA"))
# Aggregate and sum up genes with same gene symbol (sum up isoform of genes)
counts_raw <- counts_raw[counts_raw$gene_name %in% gene_id_metadata$Gene.name, ]
counts_raw <- aggregate(counts_raw[, -1],
list(gene_name = counts_raw[, 1]),
FUN = sum
)
# Change row names to gene names
rownames(counts_raw) <- counts_raw$gene_name
counts_raw <- dplyr::select(counts_raw, -c(gene_name))
Match with raw counts.
# Load sample metadata
sampleTable <- read.table(sample_metadata_file,
sep = ",", header = TRUE
) %>%
dplyr::select(-c(age, sex))
# Load patient metadata
patient_metadata <- read.table(patient_metadata_file,
sep = ",", header = TRUE
)
# Join sample and patient metadata
sampleTable <- sampleTable %>% left_join(patient_metadata, by = "subject_ID")
# Change visits to dose and time point...
sampleTable <- sampleTable %>%
mutate(
dose = as.factor(plyr::mapvalues(visit,
from = c(1, 2, 4, 5, 10, 11, 12),
to = c("0_dose1", "24_dose1", "0_dose2", "24_dose2", "0_dose3", "24_dose3", "48_dose3")
)),
group = as.factor(group)
) %>%
# ...(sex) 0 and 1 to male and female...
mutate(
sex = as.factor(plyr::mapvalues(sex,
from = c(0, 1),
to = c("male", "female")
)),
group = as.factor(sex)
) %>%
# ...(prior covid) 0 and 1 to neg and pos
mutate(
prior_covid19 = as.factor(plyr::mapvalues(prior_covid19,
from = c(0, 1),
to = c("neg", "pos")
)),
group = as.factor(prior_covid19)
)
# Remove 24dose_2 from subject 21 (missing 0dose_2)
sampleTable <- subset(sampleTable, sample_ID != "P22761_1037_S37")
# Remove one sample from subject 7 (sample replicate)
sampleTable <- subset(sampleTable, sample_ID != "P26208_1060_S62")
# Remove corresponding samples from count table
counts_raw <- subset(counts_raw, select = -c(P26208_1060_S62, P22761_1037_S37))
# Match count data with sample table
colnames(counts_raw) <- sampleTable$sample_ID
counts_raw <- counts_raw[, pmatch(sampleTable$sample_ID, colnames(counts_raw))]
all(rownames(sampleTable$sample_ID) == colnames(counts_raw))
Plotting raw counts for first visual inspection. The boxplot shows median values of zero across all samples. In the histogram there is a huge peak of zero counts. Both indicate that the data set would benefit from a low count filtering.
# Visualize distribution of raw counts with boxplot and density plot
boxplot(log2(as.matrix(counts_raw) + 1),
ylab = expression("Log"[2] ~ "Read counts"),
las = 2,
main = "Raw data"
)
hist(log2(as.matrix(counts_raw) + 1),
ylab = "",
las = 2,
main = "Raw data"
)
Plot number of genes detected across samples. All samples are more or less close to average so we don’t need to discard any samples.
barplot(colSums(counts_raw > 3),
ylab = "Number of detected genes",
las = 2
) +
abline(h = median(colSums(counts_raw > 3)))
Removing reads with the log2 of the counts per million (cpm) lower than 1 (= 45896 genes). Plot genes detection rate across samples, comparing raw and filtered counts.
meanLog2CPM <- rowMeans(log2(cpm(counts_raw) + 1))
counts_filtered <- counts_raw[meanLog2CPM > 1, ]
hist(rowSums(counts_raw > 3), main = title("Raw Counts"))
hist(rowSums(counts_filtered > 3), main = title("Filtered Counts"))
Plot distribution of the filtered counts
boxplot(log2(as.matrix(counts_filtered) + 1),
ylab = expression("Log"[2] ~ "Read counts"),
las = 2,
main = "Filtered data"
)
hist(log2(as.matrix(counts_filtered) + 1),
ylab = "",
las = 2,
main = "Filtered data"
)
Generating three separate DESeq objects with different design parameters to allow for more comparisons. Controlling for age and sex.
# Remove vaccine/sars-cov-2 related genes
counts_filtered <- counts_filtered[!grepl("vaccine|spike|sars_cov", rownames(counts_filtered)), ]
# Prepare for DESeq by creating new variables and factorizing
sampleTable <- sampleTable %>%
mutate(
conditions = paste0(sampleTable$group, "_", sampleTable$dose),
time = gsub(".{6}$", "", conditions),
conditions = factor(conditions),
time = factor(time),
sex = factor(sex),
dose = factor(dose, levels = c("0_dose1", "24_dose1", "0_dose2", "24_dose2", "0_dose3", "24_dose3", "48_dose3")),
group = factor(group),
batch = factor(batch)
)
# Create DESeq objects with different designs; comparing against conditions, time and dose
dds1 <- DESeqDataSetFromMatrix(
countData = round(counts_filtered),
colData = as.data.frame(sampleTable),
design = ~ sex + age + batch + conditions
)
dds2 <- DESeqDataSetFromMatrix(
countData = round(counts_filtered),
colData = as.data.frame(sampleTable),
design = ~ sex + age + batch + time
)
dds3 <- DESeqDataSetFromMatrix(
countData = round(counts_filtered),
colData = as.data.frame(sampleTable),
design = ~ sex + age + batch + dose
)
dds4 <- DESeqDataSetFromMatrix(
countData = round(counts_filtered),
colData = as.data.frame(sampleTable),
design = ~ sex + age + batch + dose + group + dose:group
)
Normalize with variance stabilizing transformation Plot distribution of data after normalization
# Normalize with variance stabilizing transformation for later PCA and heatmap
counts_vst_normalized <- vst(dds1, blind = TRUE)
vst_matrix <- assay(counts_vst_normalized)
hist(vst_matrix)
boxplot(vst_matrix,
ylab = expression("Log"[2] ~ "Read counts"),
las = 2,
main = "VST"
)
Hierarchical clustering for initial inspection of sample similarity and gene expression patterns.
# Sample heatmap
sampleDist <- cor(vst_matrix, method = "spearman")
Metadata <- data.frame(sampleTable$group, sampleTable$dose)
names(Metadata) <- c("Group", "Dose")
rownames(Metadata) <- sampleTable$sample_ID
# Plot heatmap
colors <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(255)
Heatmap <- pheatmap(sampleDist,
color = colors,
clustering_distance_rows = as.dist(1 - sampleDist),
clustering_distance_cols = as.dist(1 - sampleDist),
show_rownames = FALSE,
show_colnames = FALSE,
clustering_method = "ward.D2",
annotation_col = Metadata
)
print(Heatmap)
Dimensionality reduction for evaluating outliers and global sample clusters for quality control and primary exploratory analysis.
# PCA plot
pcaRes <- prcomp(t(assay(counts_vst_normalized)))
varExp <- round(summary(pcaRes)[[1]], digits = 2)
pcaDF <- data.frame(
PC1 = pcaRes$x[, 1],
PC2 = pcaRes$x[, 2],
Group = sampleTable$group,
Sample = sampleTable$sample_ID,
Dose = sampleTable$dose
)
pcaPlot <- ggplot(pcaDF, mapping = aes(x = PC1, y = PC2, color = Group, label = Sample)) +
geom_point(aes(shape = Group), size = 3) +
labs(
x = paste0("PC1 (", varExp[1], " %)"),
y = paste0("PC2 (", varExp[2], " %)")
) +
theme(
axis.text = element_text(size = 12),
legend.text = element_text(size = 10)
) +
scale_color_manual(values = brewer.pal(3, "Accent")) +
cowplot::theme_cowplot()
print(pcaPlot)
# Scree plot, showing contribution of principal components in descending order
pca.var <- pcaRes$sdev^2
pca.var.per <- round(pca.var / sum(pca.var) * 100, 1)
barplot(pca.var.per,
main = "Scree Plot",
xlab = "Principal Component",
ylab = "Percent Variation"
)
# Explore which variables contribute the most for each PC
var <- get_pca_var(pcaRes)
var$contrib %>%
as.data.frame() %>%
arrange(desc(Dim.1)) %>%
head() %>%
kableExtra::kable() %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
| Dim.1 | Dim.2 | Dim.3 | Dim.4 | Dim.5 | Dim.6 | Dim.7 | Dim.8 | Dim.9 | Dim.10 | Dim.11 | Dim.12 | Dim.13 | Dim.14 | Dim.15 | Dim.16 | Dim.17 | Dim.18 | Dim.19 | Dim.20 | Dim.21 | Dim.22 | Dim.23 | Dim.24 | Dim.25 | Dim.26 | Dim.27 | Dim.28 | Dim.29 | Dim.30 | Dim.31 | Dim.32 | Dim.33 | Dim.34 | Dim.35 | Dim.36 | Dim.37 | Dim.38 | Dim.39 | Dim.40 | Dim.41 | Dim.42 | Dim.43 | Dim.44 | Dim.45 | Dim.46 | Dim.47 | Dim.48 | Dim.49 | Dim.50 | Dim.51 | Dim.52 | Dim.53 | Dim.54 | Dim.55 | Dim.56 | Dim.57 | Dim.58 | Dim.59 | Dim.60 | Dim.61 | Dim.62 | Dim.63 | Dim.64 | Dim.65 | Dim.66 | Dim.67 | Dim.68 | Dim.69 | Dim.70 | Dim.71 | Dim.72 | Dim.73 | Dim.74 | Dim.75 | Dim.76 | Dim.77 | Dim.78 | Dim.79 | Dim.80 | Dim.81 | Dim.82 | Dim.83 | Dim.84 | Dim.85 | Dim.86 | Dim.87 | Dim.88 | Dim.89 | Dim.90 | Dim.91 | Dim.92 | Dim.93 | Dim.94 | Dim.95 | Dim.96 | Dim.97 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ANKRD22 | 0.3907566 | 0.3707759 | 0.1763453 | 0.0697718 | 0.0223635 | 0.0447039 | 0.0114715 | 0.0781899 | 0.0020209 | 0.0021541 | 0.0015474 | 0.0081783 | 0.0088655 | 0.0381066 | 0.0000211 | 0.0425199 | 0.0027675 | 0.0080813 | 0.0002822 | 0.0596314 | 0.0141149 | 0.0612235 | 0.0000446 | 0.0468783 | 0.0127597 | 0.0627942 | 0.0002991 | 0.0767245 | 0.0836628 | 0.1805330 | 0.0295422 | 0.0209039 | 0.0430086 | 0.0242306 | 0.0472737 | 0.0096422 | 0.0526057 | 0.0641638 | 0.0016640 | 0.0024777 | 0.0088926 | 0.0301645 | 0.0416714 | 0.0412024 | 0.0189615 | 0.1289343 | 0.0132374 | 0.0025781 | 0.0326548 | 0.0011627 | 0.0377771 | 0.0854475 | 0.0100276 | 0.0036267 | 0.0145649 | 0.0882436 | 0.0000000 | 0.0568559 | 0.0114556 | 0.0219706 | 0.0291159 | 0.0022544 | 0.0357510 | 0.0288985 | 0.0010657 | 0.0129628 | 0.0032932 | 0.0031827 | 0.0101765 | 0.0117289 | 0.0204692 | 0.0416297 | 0.0144796 | 0.0990382 | 0.0173874 | 0.0007648 | 0.0102040 | 0.0397444 | 0.0380011 | 0.0002510 | 0.0067175 | 0.0000023 | 0.0088305 | 0.0259774 | 0.0017850 | 0.0088491 | 0.0001625 | 0.0032289 | 0.0366255 | 0.0028255 | 0.0087858 | 0.0084940 | 0.0878354 | 0.0104456 | 0.1651719 | 0.0100752 | 0.0075298 |
| CD274 | 0.3219816 | 0.1684654 | 0.0509356 | 0.0072978 | 0.0147743 | 0.0267557 | 0.0013991 | 0.0611167 | 0.0209512 | 0.0000531 | 0.0025820 | 0.0116135 | 0.0033795 | 0.0076675 | 0.0085935 | 0.0130656 | 0.0051637 | 0.0138596 | 0.0050288 | 0.0329616 | 0.0576326 | 0.0324778 | 0.0071727 | 0.0047896 | 0.0199693 | 0.0022324 | 0.0371298 | 0.0040813 | 0.0291641 | 0.0814194 | 0.0115577 | 0.0365728 | 0.0054533 | 0.0128201 | 0.0218089 | 0.0003426 | 0.0487911 | 0.0001828 | 0.1057181 | 0.0012723 | 0.0000004 | 0.0132958 | 0.0055776 | 0.0816477 | 0.0263703 | 0.0006540 | 0.0009068 | 0.0057047 | 0.0008221 | 0.0108240 | 0.0179337 | 0.0000878 | 0.0193121 | 0.0037728 | 0.0072008 | 0.0097795 | 0.0330921 | 0.0018139 | 0.0003408 | 0.0067511 | 0.0240466 | 0.0003445 | 0.0016070 | 0.0000475 | 0.0001970 | 0.0463836 | 0.0521358 | 0.0118571 | 0.0123785 | 0.0000199 | 0.0016967 | 0.0058354 | 0.0044445 | 0.0153624 | 0.0051562 | 0.0024172 | 0.0000045 | 0.0009116 | 0.0019972 | 0.0109891 | 0.0280394 | 0.0064917 | 0.0000974 | 0.0004984 | 0.0229813 | 0.0124543 | 0.0035928 | 0.0010370 | 0.0211658 | 0.0124155 | 0.0263697 | 0.0094679 | 0.0045309 | 0.0009275 | 0.0216106 | 0.0039971 | 0.0000234 |
| RSAD2 | 0.2947192 | 0.2049546 | 0.4974586 | 0.0318281 | 0.0073626 | 0.0210826 | 0.1010576 | 0.1187275 | 0.3007710 | 0.0244298 | 0.0190231 | 0.0541176 | 0.1354022 | 0.1027793 | 0.0013662 | 0.0521347 | 0.3049723 | 0.1612313 | 0.2140448 | 0.0875979 | 0.2062015 | 0.0006657 | 0.0195654 | 0.0034210 | 0.0896312 | 0.0676904 | 0.3459785 | 0.0596110 | 0.0071078 | 0.1060516 | 0.0247890 | 0.0498140 | 0.0573276 | 0.0627742 | 0.1763421 | 0.0151587 | 0.0992023 | 0.2075634 | 0.0092668 | 0.0024749 | 0.0908292 | 0.0034130 | 0.0096694 | 0.0851753 | 0.0208146 | 0.2071238 | 0.0231645 | 0.0032822 | 0.0006856 | 0.0008489 | 0.0002554 | 0.0006178 | 0.0037641 | 0.0874743 | 0.0213023 | 0.0303004 | 0.0560511 | 0.0066217 | 0.0010099 | 0.0583976 | 0.0051901 | 0.0005496 | 0.0134009 | 0.0022981 | 0.0466030 | 0.0241285 | 0.0636444 | 0.0266754 | 0.0555331 | 0.0322514 | 0.0037240 | 0.1087341 | 0.0033155 | 0.0144053 | 0.0351500 | 0.0230706 | 0.1349499 | 0.0232548 | 0.0000171 | 0.0012495 | 0.0225385 | 0.0487633 | 0.0329394 | 0.0104405 | 0.0000561 | 0.0597059 | 0.0241697 | 0.0737168 | 0.0018283 | 0.0222574 | 0.0007056 | 0.0111165 | 0.0012408 | 0.0163556 | 0.0005637 | 0.0001130 | 0.0008663 |
| GBP5 | 0.2775365 | 0.2180647 | 0.1971194 | 0.0086061 | 0.0183571 | 0.0078968 | 0.0053304 | 0.0279666 | 0.0727507 | 0.0038873 | 0.0049237 | 0.0522437 | 0.0013829 | 0.0003160 | 0.0000344 | 0.0333063 | 0.0164076 | 0.0083372 | 0.0022120 | 0.0181464 | 0.0186165 | 0.0805872 | 0.0097645 | 0.0662536 | 0.0071108 | 0.0014847 | 0.0011171 | 0.0274428 | 0.0232320 | 0.0657584 | 0.0634762 | 0.0193610 | 0.0094466 | 0.0309201 | 0.0724822 | 0.0000920 | 0.0808818 | 0.0093290 | 0.0008618 | 0.0260111 | 0.0137608 | 0.0245504 | 0.0085836 | 0.0415994 | 0.0179343 | 0.0045942 | 0.0109688 | 0.0088312 | 0.0010925 | 0.0314393 | 0.0011057 | 0.0542982 | 0.0579596 | 0.0756799 | 0.0092425 | 0.0511590 | 0.0000682 | 0.0555034 | 0.0236939 | 0.0009586 | 0.0094485 | 0.0001229 | 0.0169044 | 0.0016760 | 0.0020045 | 0.0009688 | 0.0014923 | 0.0013279 | 0.0190562 | 0.0053548 | 0.0080992 | 0.0026682 | 0.0068683 | 0.0016553 | 0.0042436 | 0.0114684 | 0.0087335 | 0.0040431 | 0.0044237 | 0.0032285 | 0.0028552 | 0.0031691 | 0.0018202 | 0.0000005 | 0.0020222 | 0.0005591 | 0.0027866 | 0.0000328 | 0.0019157 | 0.0003959 | 0.0006496 | 0.0003764 | 0.0008036 | 0.0035706 | 0.0004326 | 0.0014613 | 0.0217768 |
| IDO1 | 0.2695485 | 0.3902585 | 0.3248260 | 0.0000503 | 0.0519635 | 0.1134049 | 0.0029245 | 0.1120883 | 0.1451199 | 0.0130168 | 0.0051264 | 0.0583079 | 0.0002519 | 0.0827080 | 0.1974039 | 0.0133183 | 0.1286804 | 0.0046578 | 0.2177188 | 0.2242944 | 0.0227812 | 0.0020528 | 0.1392108 | 0.0628725 | 0.0136930 | 0.0720865 | 0.0187798 | 0.0345214 | 0.0001039 | 0.0100099 | 0.0079035 | 0.0055624 | 0.0013207 | 0.0219797 | 0.0235757 | 0.3588397 | 0.0533840 | 0.0765474 | 0.0590732 | 0.1075406 | 0.0999469 | 0.0085405 | 0.1487057 | 0.0660700 | 0.0000173 | 0.0021043 | 0.2139270 | 0.0396101 | 0.3171282 | 0.0025444 | 0.0486026 | 0.0092592 | 0.0009588 | 0.0905524 | 0.0000463 | 0.0283794 | 0.0035875 | 0.0007088 | 0.1144612 | 0.0115228 | 0.0750467 | 0.0412158 | 0.0003541 | 0.0217175 | 0.0016044 | 0.0100588 | 0.3301425 | 0.0028503 | 0.0020741 | 0.0060669 | 0.0281377 | 0.0035831 | 0.0793929 | 0.0050499 | 0.0124676 | 0.1588564 | 0.0214707 | 0.0216549 | 0.0480618 | 0.0003953 | 0.0001289 | 0.0000622 | 0.0857592 | 0.1182902 | 0.0056783 | 0.0233494 | 0.1111740 | 0.0261812 | 0.4302242 | 0.0971284 | 0.0191912 | 0.0041599 | 0.0525118 | 0.0103630 | 0.0006946 | 0.0497602 | 0.0005033 |
| IFI44L | 0.2559941 | 0.0967018 | 0.4042610 | 0.0589702 | 0.0129810 | 0.0337118 | 0.0405968 | 0.1877308 | 0.2295361 | 0.0109766 | 0.0037365 | 0.0305678 | 0.1265156 | 0.0877680 | 0.0001247 | 0.0229197 | 0.0788515 | 0.0973684 | 0.1814140 | 0.0630997 | 0.0746943 | 0.0132093 | 0.0005669 | 0.0030227 | 0.0385829 | 0.0697889 | 0.1785369 | 0.0849351 | 0.0743779 | 0.0034113 | 0.0234150 | 0.0566579 | 0.0104426 | 0.0518468 | 0.0559481 | 0.0033853 | 0.0070485 | 0.2408779 | 0.0142019 | 0.0109448 | 0.0550837 | 0.0000410 | 0.0045375 | 0.2019914 | 0.0003672 | 0.0917498 | 0.0227366 | 0.0228853 | 0.0080531 | 0.0020156 | 0.0008918 | 0.0005016 | 0.0062675 | 0.0450977 | 0.0529248 | 0.0159847 | 0.0241092 | 0.0001956 | 0.0291446 | 0.0106654 | 0.0127238 | 0.0005716 | 0.0040855 | 0.0120674 | 0.0085962 | 0.0000152 | 0.0261820 | 0.0015791 | 0.0224153 | 0.0000002 | 0.0025845 | 0.0304191 | 0.0050129 | 0.0102226 | 0.0628257 | 0.0202575 | 0.1363499 | 0.0000662 | 0.0113016 | 0.0076377 | 0.0334686 | 0.0000350 | 0.0011456 | 0.0262246 | 0.0000334 | 0.0702977 | 0.0071828 | 0.0351828 | 0.0078437 | 0.0024033 | 0.0002701 | 0.0025880 | 0.0036008 | 0.0000067 | 0.0788306 | 0.0472378 | 0.0017065 |
Differential Gene Expression Analysis using DESeq2. Comparing parameters; dose, time point, prior infection status.
# Run the DESeq2 analysis
dds1 <- DESeq(dds1)
dds2 <- DESeq(dds2)
dds3 <- DESeq(dds3)
ddsTC <- DESeq(dds4, test = "LRT", reduced = ~ dose + group)
# Contrast
resultsNames(dds1)
resultsNames(dds2)
resultsNames(dds3)
resultsNames(ddsTC)
# Create function for makings comparisons with default being conditions and dds1
compare_DEGs <- function(variable1, variable2, condition = "conditions", dds = dds1) {
as.data.frame(results(dds, contrast = c(condition, variable1, variable2)))
}
# Baselines DEGs
base_pos_neg_d1 <- compare_DEGs("pos_0_dose1", "neg_0_dose1")
base_pos_neg_d2 <- compare_DEGs("pos_0_dose2", "neg_0_dose2")
base_pos_neg_d3 <- compare_DEGs("pos_0_dose3", "neg_0_dose3")
base_pos_d1_d2 <- compare_DEGs("pos_0_dose2", "pos_0_dose1")
base_pos_d1_d3 <- compare_DEGs("pos_0_dose3", "pos_0_dose1")
base_pos_d2_d3 <- compare_DEGs("pos_0_dose2", "pos_0_dose3")
base_neg_d1_d2 <- compare_DEGs("neg_0_dose2", "neg_0_dose1")
base_neg_d1_d3 <- compare_DEGs("neg_0_dose3", "neg_0_dose1")
base_neg_d2_d3 <- compare_DEGs("neg_0_dose2", "neg_0_dose3")
# 0-24h and 0-48 (3rd dose) DEGs
neg_d1 <- compare_DEGs("neg_24_dose1", "neg_0_dose1")
neg_d2 <- compare_DEGs("neg_24_dose2", "neg_0_dose2")
neg_d3_24 <- compare_DEGs("neg_24_dose3", "neg_0_dose3")
neg_d3_48 <- compare_DEGs("neg_48_dose3", "neg_0_dose3")
pos_d1 <- compare_DEGs("pos_24_dose1", "pos_0_dose1")
pos_d2 <- compare_DEGs("pos_24_dose2", "pos_0_dose2")
pos_d3_24 <- compare_DEGs("pos_24_dose3", "pos_0_dose3")
pos_d3_48 <- compare_DEGs("pos_48_dose3", "pos_0_dose3")
# Results dds2
pos <- compare_DEGs("pos_24", "pos_0", condition = "time", dds = dds2)
neg <- compare_DEGs("neg_24", "neg_0", condition = "time", dds = dds2)
# Results dds3
d1 <- compare_DEGs("24_dose1", "0_dose1", condition = "dose", dds = dds3)
d2 <- compare_DEGs("24_dose2", "0_dose2", condition = "dose", dds = dds3)
d3_24 <- compare_DEGs("24_dose3", "0_dose3", condition = "dose", dds = dds3)
d3_48 <- compare_DEGs("48_dose3", "0_dose3", condition = "dose", dds = dds3)
# Results dds4 intercept
resTC <- results(ddsTC)
Volcano plot, filtering for significant values with False Discovery Rate < 0.05 and log fold change greater than 1.
results_list <- list(
base_pos_neg_d1,
base_pos_neg_d2,
base_pos_neg_d3,
base_pos_d1_d2,
base_pos_d1_d3,
base_pos_d2_d3,
base_neg_d1_d2,
base_neg_d1_d3,
base_neg_d2_d3,
neg_d1,
neg_d2,
neg_d3_24,
neg_d3_48,
pos_d1,
pos_d2,
pos_d3_24,
pos_d3_48,
pos,
neg,
d1,
d2,
d3_24,
d3_48
)
names(results_list) <- c(
"Baseline Dose 1 Conv vs Naïve",
"Baseline Dose 2 Conv vs Naïve",
"Baseline Dose 3 Conv vs Naïve",
"Baseline Conv Dose 1 and 2",
"Baseline Conv Dose 1 and 3",
"Baseline Conv Dose 2 and 3",
"Baseline Naïve Dose 1 and 2",
"Baseline Naïve Dose 1 and 3",
"Baseline Naïve Dose 2 and 3",
"Naïve Dose 1 0-24 hours",
"Naïve Dose 2 0-24 hours",
"Naïve Dose 3 0-24 hours",
"Naïve Dose 3 0-48 hours",
"Conv Dose 1 0-24 hours",
"Conv Dose 2 0-24 hours",
"Conv Dose 3 0-24 hours",
"Conv Dose 3 0-48 hours",
"Conv 0-24 hours",
"Naïve 0-24 hours",
"Dose 1 0-24 hours",
"Dose 2 0-24 hours",
"Dose 3 0-24 hours",
"Dose 3 0-48 hours"
)
results_list_plot <- lapply(results_list, function(x) {
x <- x %>% mutate(
test_padj = case_when(
padj < 0.05 & log2FoldChange >= 1 ~ "Up regulated",
padj < 0.05 & log2FoldChange <= -1 ~ "Down regulated",
TRUE ~ "Not significant"
),
test_pvalue = case_when(
pvalue < 0.05 & log2FoldChange >= 1 ~ "Up regulated",
pvalue < 0.05 & log2FoldChange <= -1 ~ "Down regulated",
TRUE ~ "Not significant"
),
)
})
# function to plot volcano plots
volcano_plot <- function(x, pvalue_selection = c("padj", "pvalue")) {
if (pvalue_selection == "padj") {
col_name_pval <- "test_padj"
} else if (pvalue_selection == "pvalue") {
col_name_pval <- "test_pvalue"
} else {
stop("Please change the pvalue_selection argument value. It should either `padj` or `pvalue`.")
}
subsetted <- subset(results_list_plot[[x]] %>% tibble::rownames_to_column("gene_symbol"), abs(log2FoldChange) >= 1 & get(pvalue_selection) < 0.05)
label_list <- c("RSAD2", "IFIT1", "CXCL10", "CMPK2", "GBP1P1", "IFI44L", "BATF2", "SIGLEC1", "IFI44", "NDUFS4", "MX1", "CXCL9", "FCGR1A", "FCGR1B", "CCL4")
label <- subsetted %>% filter(gene_symbol %in% label_list)
# count up regulated DEGs
deg_number_up <- subset(
results_list_plot[[x]] %>%
tibble::rownames_to_column("gene_symbol"),
log2FoldChange >= 1 & get(pvalue_selection) < 0.05
) %>%
nrow()
# count down regulated DEGs
deg_number_down <- subset(
results_list_plot[[x]] %>%
tibble::rownames_to_column("gene_symbol"),
log2FoldChange <= -1 & get(pvalue_selection) < 0.05
) %>%
nrow()
results_list_plot[[x]] %>%
ggplot(aes(x = log2FoldChange, y = -log10(get(pvalue_selection)))) +
geom_point(aes(color = get(col_name_pval)), size = 3, alpha = 0.3) +
scale_color_manual(values = c("Down regulated" = "blue", "Not significant" = "grey80", "Up regulated" = "red")) +
xlab(expression("Fold Change (Log"[2] * ")")) +
ylab(expression(paste0("-Log"[10] * "(", pvalue_selection, ")"))) +
labs(x = NULL, y = NULL) +
geom_vline(xintercept = c(-1), linetype = "dotted", size = 1) +
geom_vline(xintercept = c(1), linetype = "dotted", size = 1) +
geom_hline(yintercept = -log10(0.05), linetype = "dotted", size = 1) +
geom_label_repel(data = label, aes(log2FoldChange, -log10(get(pvalue_selection)), label = gene_symbol)) +
xlim(-2, 6) +
ylim(0, 20) +
ggtitle(x) +
annotate(geom = "text", colour = "red", size = 10, x = 4, y = 20, hjust = 0, label = paste0(deg_number_up)) +
annotate(geom = "text", colour = "blue", size = 10, x = -2, y = 20, hjust = 0, label = paste0(deg_number_down)) +
ggprism::theme_prism() +
theme(axis.ticks = element_line(size = .5), legend.position = "none")
}
volcano_plots <- lapply(names(results_list_plot), volcano_plot, pvalue_selection = "padj")
names(volcano_plots) <- names(results_list_plot)
volcano_plots
GSEA looks for patterns of enriched genes which are related to each other. Compared with over-representation analysis this does not rely solely on DEGs. Therefore, this method is advantageous when the differences in gene expression is smaller.
Database used for enrichment analysis, “Reactome”. Also testing the Blood Transcriptome modules presented in Li et al. 2014.
gmt <- read.gmt("../data/blood_transcription_module/BTM_for_GSEA_20131008.gmt")
gmt.reactome <- read.gmt("../data/metabolic_transcriptionmodule/MSigDB_reactome.gmt")
process_to_gsea <- function(x) {
original_gene_list <- x$log2FoldChange
names(original_gene_list) <- rownames(x)
gene_list <- na.omit(original_gene_list)
gene_list <- sort(gene_list, decreasing = TRUE)
}
ls_processed <- lapply(results_list, process_to_gsea)
list_gse <- lapply(ls_processed, function(x) {
GSEA(
geneList = x,
TERM2GENE = gmt,
minGSSize = 3,
maxGSSize = 800,
nPermSimple = 10000,
pvalueCutoff = 0.05,
verbose = TRUE,
pAdjustMethod = "fdr"
)
})
list_gse_reactome <- lapply(ls_processed, function(x) {
GSEA(
geneList = x,
TERM2GENE = gmt.reactome,
minGSSize = 3,
maxGSSize = 800,
nPermSimple = 10000,
pvalueCutoff = 0.05,
verbose = TRUE,
pAdjustMethod = "fdr"
)
})
Gene set enrichment analysis based on fold change ranking using the blood transcriptome modules presented in Li et al. 2014. Cut-off; FDR-adjusted p-value < 0.05. N = 15
my_palette <- colorRampPalette(c("#265891", "white", "#B80F20"))(n = 100)
sel_list_gse <- list_gse[c(
"Naïve Dose 1 0-24 hours",
"Naïve Dose 2 0-24 hours",
"Naïve Dose 3 0-24 hours",
"Naïve Dose 3 0-48 hours",
"Conv Dose 1 0-24 hours",
"Conv Dose 2 0-24 hours",
"Conv Dose 3 0-24 hours",
"Conv Dose 3 0-48 hours"
)]
combined_gsea <- data.table::rbindlist(lapply(sel_list_gse, as.data.frame), idcol = TRUE) %>%
group_by(.id) %>%
arrange(desc(NES)) %>%
ungroup()
to_heatmap <- combined_gsea %>%
select(.id, ID, NES) %>%
filter(NES > 2 | NES < -2, !grepl("TBA", ID)) %>%
tidyr::pivot_wider(values_from = NES, names_from = ID) %>%
mutate(across(where(anyNA), ~ tidyr::replace_na(., 0))) %>%
t()
colnames(to_heatmap) <- to_heatmap[1, ]
to_heatmap <- to_heatmap[-1, ]
col_order <- c(
"Conv Dose 1 0-24 hours",
"Naïve Dose 1 0-24 hours",
"Conv Dose 2 0-24 hours",
"Naïve Dose 2 0-24 hours",
"Conv Dose 3 0-24 hours",
"Naïve Dose 3 0-24 hours",
"Conv Dose 3 0-48 hours",
"Naïve Dose 3 0-48 hours"
)
to_heatmap <- to_heatmap[, col_order]
BTM_heatmap <- matrix(as.numeric(to_heatmap), ncol = ncol(to_heatmap)) %>%
pheatmap::pheatmap(
width = 6,
height = 20,
cellwidth = 20,
cellheight = 5,
cluster_rows = TRUE,
cluster_cols = FALSE,
clustering_method = "complete",
angle_col = 45,
border_color = "black",
cutree_cols = 4,
color = my_palette,
labels_col = colnames(to_heatmap),
labels_row = rownames(to_heatmap),
treeheight_col = 1,
fontsize_row = 6,
fontsize_col = 8,
gaps_col = c(2, 4, 6)
)
Pearson’s correlation of fold changes in individual genes identified as differentially regulated in any group. N = 15. Comparison between dose 2 and dose 3.
extract_gene_names <- function(x) {
subsetted <- results_list_plot[[x]] %>%
tibble::rownames_to_column("gene_symbol") %>%
select(gene_symbol, log2FoldChange, padj)
return(subsetted)
}
gene_names <- lapply(names(results_list_plot), extract_gene_names)
names(gene_names) <- names(results_list_plot)
gene_names_filt <- gene_names[c(
"Naïve Dose 1 0-24 hours", "Naïve Dose 2 0-24 hours",
"Naïve Dose 3 0-24 hours", "Naïve Dose 3 0-48 hours",
"Conv Dose 1 0-24 hours", "Conv Dose 2 0-24 hours",
"Conv Dose 3 0-24 hours", "Conv Dose 3 0-48 hours"
)]
gene_names_df_filt <- rbindlist(gene_names_filt, fill = TRUE, idcol = "group_timepoint") %>%
mutate(
timepoint = case_when(
grepl("Dose 1", group_timepoint) ~ "Dose 1",
grepl("Dose 2", group_timepoint) ~ "Dose 2",
grepl("Dose 3 0-24", group_timepoint) ~ "Dose 3",
grepl("Dose 3 0-48", group_timepoint) ~ "Dose 3 48h"
),
group = case_when(
grepl("Conv", group_timepoint) ~ "Conv",
grepl("Naïve", group_timepoint) ~ "Naïve"
)
)
gene_names_sign_df <- gene_names_df_filt %>%
select(-group_timepoint) %>%
pivot_wider(names_from = group, values_from = c(padj, log2FoldChange), names_glue = "{.value}_{group}") %>%
mutate(
# Categorize genes into 'degs_presence' based on significance and regulation
degs_presence = case_when(
padj_Naïve < 0.05 & padj_Conv < 0.05 & (log2FoldChange_Conv * log2FoldChange_Naïve) > 0 ~ "Both significant",
padj_Naïve < 0.05 ~ "Naïve only",
padj_Conv < 0.05 ~ "Conv only",
TRUE ~ "non_significant"
),
regulation = case_when(
log2FoldChange_Conv > 0 & log2FoldChange_Naïve > 0 ~ "Upregulated",
log2FoldChange_Conv < 0 & log2FoldChange_Naïve < 0 ~ "Downregulated",
log2FoldChange_Conv > 0 & log2FoldChange_Naïve < 0 ~ "Upregulated Conv",
log2FoldChange_Conv < 0 & log2FoldChange_Naïve > 0 ~ "Upregulated Naïve"
)
)
filtered_gene_doses <- gene_names_sign_df %>%
filter(!timepoint %in% c("Dose 1", "Dose 3 48h"))
g1 <- filtered_gene_doses %>%
filter(degs_presence != "non_significant") %>%
ggplot(aes(log2FoldChange_Naïve, log2FoldChange_Conv, color = degs_presence)) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "non_significant"), alpha = .3, color = "grey80") +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Naïve only"), alpha = .3) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Conv only"), alpha = .3) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Both significant"), alpha = .3) +
scale_color_viridis_d() +
scale_y_continuous(limits = c(-6, 6)) +
scale_x_continuous(limits = c(-6, 6)) +
geom_smooth(data = subset(filtered_gene_doses, degs_presence != "non_significant", se = TRUE), method = "lm") +
scale_color_npg() +
stat_cor(method = "pearson", cor.coef.name = "r", p.accuracy = 0.001) +
facet_wrap(~timepoint) +
theme_prism(base_line_size = .5, base_fontface = "plain") +
theme(
axis.ticks = element_line(size = .5), legend.position = "top",
aspect.ratio = 1
)
g1
Fold changes in individual genes identified as significantly differentially regulated in any group, Dose 1 and Dose 3.
filtered_gene_doses <- gene_names_sign_df %>%
filter(timepoint %in% c("Dose 1", "Dose 3 48h"))
g2 <- filtered_gene_doses %>%
filter(degs_presence != "non_significant") %>%
ggplot(aes(log2FoldChange_Naïve, log2FoldChange_Conv, color = degs_presence)) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "non_significant"), alpha = .3, color = "grey80") +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Naïve only"), alpha = .3) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Conv only"), alpha = .3) +
geom_point(data = subset(filtered_gene_doses, degs_presence == "Both significant"), alpha = .3) +
scale_color_viridis_d() +
scale_y_continuous(limits = c(-6, 6)) +
scale_x_continuous(limits = c(-6, 6)) +
geom_smooth(data = subset(filtered_gene_doses, degs_presence != "non_significant", se = TRUE), method = "lm") +
scale_color_npg() +
stat_cor(method = "pearson", cor.coef.name = "r", p.accuracy = 0.001) +
facet_wrap(~timepoint) +
theme_prism(base_line_size = .5, base_fontface = "plain") +
theme(
axis.ticks = element_line(size = .5), legend.position = "top",
aspect.ratio = 1
)
g2
The Jaccard index is a measure of overlap between two sets of genes, in this case the DEGs between the convalascent and naive groups. Calculated using the GeneOverlap package.
degs <- lapply(gene_names_filt, function(x) {
x %>%
filter(padj < 0.05, log2FoldChange > 1) %>%
pull(gene_symbol)
})
degs <- degs[-c(1, 4, 8)]
gom_obj <- newGOM(degs, degs)
filtered_degs_matrix <- getMatrix(gom_obj, "Jaccard")
my_palette <- colorRampPalette(c("#265891", "#F6FF92", "#B80F20"))(n = 50)
# order matrix based on cluster
od <- hclust(dist(filtered_degs_matrix), method = "ward.D2")$order
filtered_degs_matrix_ordered <- filtered_degs_matrix[od, od]
# Define color values for different row annotations
row_col_group <- c("Naïve" = "#A9A9A9", "Conv" = "#A9CAF6")
row_col_time <- c(
"Dose 1 0-24 hours" = "#5e4fa2",
"Dose 2 0-24 hours" = "#fdae61",
"Dose 3 0-24 hours" = "#9e0142",
"Dose 3 0-48 hours" = "#abdda4"
)
# Create row annotations for heatmap
row_anno <- rowAnnotation(
timepoint = gsub(".*? (.*)$", "\\1", rownames(filtered_degs_matrix_ordered)),
group = gsub("^(.*?) .*", "\\1", rownames(filtered_degs_matrix_ordered)),
col = list(timepoint = row_col_time, group = row_col_group),
annotation_legend_param = list(
timepoint = list(title = "Timepoint"),
group = list(title = "Group")
)
)
col_anno <- columnAnnotation(
timepoint = gsub(".*? (.*)$", "\\1", rownames(filtered_degs_matrix_ordered)),
group = gsub("^(.*?) .*", "\\1", rownames(filtered_degs_matrix_ordered)),
col = list(timepoint = row_col_time, group = row_col_group),
show_legend = FALSE
)
heatmap1 <- Heatmap(filtered_degs_matrix_ordered,
border = FALSE,
rect_gp = gpar(type = "none"),
show_row_names = FALSE, show_column_names = FALSE,
name = "Jaccard similarity index",
bottom_annotation = col_anno,
right_annotation = row_anno,
col = my_palette,
clustering_method_row = "ward.D2",
clustering_method_columns = "ward.D2",
show_column_dend = FALSE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (as.numeric(x) <= 1.1 - as.numeric(y)) {
grid.rect(x, y, w, h, gp = gpar(fill = fill, col = fill))
}
}
)
heatmap1
Collection of significant differentially expressed genes of immune-associated groups of receptor ligands and receptors annotated in the HUGO database (total of 157). The bubble size represents the mean of absolute fold change when significant for both groups but the value itself when significant for only one of the groups.
4 gene sets were used based on the groups of Chemokine (45), Interferon (33), Interleukins (43), and TNF (18) ligands according to the HUGO gene nomenclature Committee.
Cut-off; FDR-adjusted p-value < 0.05. N = 15.
ls_gene_file <- list.files("../data/gene_sets", pattern = "231102|231128", full.names = TRUE)
ls_gene_sets <- lapply(ls_gene_file, read.csv, header = TRUE, sep = ",")
names(ls_gene_sets) <- sub("^(?:[^_]+_){2}([^_]+)_.*$", "\\1", ls_gene_file)
gene_sets <- rbindlist(ls_gene_sets, idcol = "gene_set", use.names = TRUE)
cytokine_heatmap <- gene_names_sign_df %>%
pivot_longer(
cols = starts_with("padj_") | starts_with("log2FoldChange_"),
names_to = c(".value", "group"),
names_pattern = "([\\p{L}]+)_(\\w+)"
) %>%
filter(gene_symbol %in% gene_sets$Approved_symbol) %>%
mutate(
group = ifelse(group == "Na", "Naïve", group),
group_timepoint = paste(timepoint, group)
) %>%
group_by(gene_symbol, timepoint) %>%
mutate(FoldChange_mean = case_when(
degs_presence == "Both significant" ~ mean(FoldChange),
degs_presence == "non_significant" ~ min(FoldChange),
degs_presence %in% c("Conv only", "Naïve only") ~ max(FoldChange)
)) %>%
ungroup()
g2 <- cytokine_heatmap %>%
group_by(gene_symbol) %>%
filter(
!all(degs_presence == "non_significant"),
regulation %in% c("Upregulated", "Downregulated")
) %>%
ungroup() %>%
ggplot(aes(x = timepoint, y = gene_symbol, size = abs(FoldChange_mean), color = degs_presence)) +
geom_point() +
scale_color_manual(values = c("#D95F02", "#A9CAF6", "#A9A9A9", "grey90")) +
labs(x = "", y = "", size = "FoldChange\n(Absolute log2)", color = "Significance") +
theme_bw() +
facet_wrap(~regulation, scales = "free_y") +
theme_prism(base_line_size = .5, base_fontface = "plain", axis_text_angle = 45, base_size = 12) +
theme(
axis.ticks = element_line(size = .5), legend.position = "right",
aspect.ratio = 1,
legend.title = element_text()
)
g2
# interesting genes related to complement
plot_counts <- function(gene_interest, dds = dds1) {
fiss <- plotCounts(dds, gene_interest,
intgroup = c("dose", "group"), returnData = TRUE, transform = FALSE, normalized = TRUE, replaced = FALSE
)
g <- ggplot(
fiss,
aes(x = dose, y = count, color = group, group = group)
) +
geom_point() +
stat_summary(fun.y = mean, geom = "line") +
scale_y_log10() +
labs(x = "", y = "Normalized counts\n(median of ratios)") +
scale_color_manual(values = c("#A9A9A9", "#A9CAF6")) +
theme_prism(base_line_size = .5, base_fontface = "plain", axis_text_angle = 45) +
theme(
axis.ticks = element_line(size = .5), legend.position = "right",
aspect.ratio = 1,
legend.title = element_text()
)
g
ggsave(g,
filename = paste0("../", result.dir, gene_interest, "_cytokine_degs.pdf"),
height = 5, width = 5
)
}
plot_counts(gene_interest = "CXCL10")
plot_counts(gene_interest = "IL16")
plot_counts(gene_interest = "CD40LG")
plot_counts(gene_interest = "CCL5")
plot_counts(gene_interest = "TNF")
plot_counts(gene_interest = "TNFSF13B")
plot_counts(gene_interest = "IL1B")
plot_counts(gene_interest = "IL15")
Databases used; KEGG Pathway Database (figure 3A) and the MSigDB subset for Transcriptional Factors Targets (figure 3B).
DEGs with log2(fold change) greater than 1. Cut-off for DEGs was FDR-adjusted p-value < 0.05. N = 15.
msig_geneset <- msigdbr(species = "Homo sapiens", category = "C3")
msig_geneset <- msig_geneset %>% filter(gs_subcat %in% c("TFT:GTRD", "TFT:TFT_Legacy"))
tft_geneset <- msig_geneset %>%
dplyr::select(gs_name, gene_symbol) %>%
dplyr::rename(term = gs_name, gene = gene_symbol)
annot <- ensembldb::select(EnsDb.Hsapiens.v86,
keys = keys(EnsDb.Hsapiens.v86),
columns = c("ENTREZID", "SYMBOL")
)
degs <- lapply(degs, function(x) plyr::mapvalues(x, annot$GENEID, annot$SYMBOL, warn_missing = FALSE))
## map names with TFs
ora_tf <- lapply(degs, function(x) {
enricher(x,
pvalueCutoff = 1,
pAdjustMethod = "fdr",
qvalueCutoff = 1,
TERM2GENE = tft_geneset
)
})
ora_tf_df <- lapply(ora_tf, data.frame)
ora_tf_df_bind <- rbindlist(ora_tf_df, use.names = TRUE, idcol = "group_timepoint", fill = TRUE) %>% as.data.frame()
ora_tf_df_bind <- ora_tf_df_bind %>%
mutate(
timepoint = case_when(
grepl("Dose 1", group_timepoint) ~ "Dose 1",
grepl("Dose 2", group_timepoint) ~ "Dose 2",
grepl("Dose 3 0-24", group_timepoint) ~ "Dose 3",
grepl("Dose 3 0-48", group_timepoint) ~ "Dose 3 48h"
),
group = case_when(
grepl("Conv", group_timepoint) ~ "Conv",
grepl("Naïve", group_timepoint) ~ "Naïve"
),
timepoint_group = paste(timepoint, group, sep = "_")
)
# Create the ggplot with the reordered x-axis
ora_tf_df_bind <- ora_tf_df_bind %>%
filter(p.adjust < 0.05 & Count > 1) %>%
mutate(
ID_cut = case_when(
grepl("TARGET", ID) ~ sub("_.*$", "", ID),
str_count(ID, "_") > 1 ~ sub("^[^_]*_(.*?)_.*$", "\\1", ID),
TRUE ~ sub("_.*$", "", ID),
),
ID_cut = ifelse(ID_cut == "ICSBP", "IRF8/ICSBP", ID_cut)
) %>%
filter(!(ID_cut %in% c("YNGTTNNNATT", "NFKAPPAB65", "NFKB"))) %>%
# Remove duplicated ID by selecting the gene set with lowest p_value
arrange(ID, p.adjust) %>%
distinct(ID, timepoint_group, .keep_all = TRUE)
# create empty data.frame with Naive Dose 1 for aesthetics
data_empty <- data.frame(
timepoint_group = "Dose 1_Naïve",
ID_cut = unique(ora_tf_df_bind$ID_cut),
p.adjust = 1
)
ora_tf_df_bind <- plyr::rbind.fill(ora_tf_df_bind, data_empty)
g3 <- ora_tf_df_bind %>%
ggplot(aes(x = timepoint_group, y = ID_cut, fill = -log10(p.adjust))) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(low = "#265891", mid = "white", high = "#B80F20") +
labs(x = "", y = "", fill = "Adjusted p-value\n(-log10)") +
theme_prism(base_line_size = .5, base_fontface = "plain", axis_text_angle = 45) +
theme(
axis.ticks = element_line(size = .5), legend.position = "right",
aspect.ratio = 1,
legend.title = element_text()
)
g3
degs[["Naïve Dose 1 0-24 hours"]] <- "no_degs"
degs_entrez_id <- lapply(degs, function(x) plyr::mapvalues(x, from = annot$SYMBOL, to = annot$ENTREZID, warn_missing = FALSE))
compared_cluster <- compareCluster(degs_entrez_id, fun = "enrichKEGG", organism = "hsa")
cluster_termsim <- pairwise_termsim(compared_cluster, method = "JC")
# create empty data.frame with Naive Dose 1 for aesthetics
data_empty <- data.frame(
Cluster = "Naïve Dose 1 0-24 hours",
ID = cluster_termsim@compareClusterResult$ID,
Description = cluster_termsim@compareClusterResult$Description,
log10_padj = 0,
pvalue = 1,
p.adjust = 1,
qvalue = 1,
geneID = 0,
Count = 1, GeneRatio = 0, BgRatio = 0
) %>% distinct()
cluster_termsim@compareClusterResult <- cluster_termsim@compareClusterResult %>%
mutate(log10_padj = -log10(p.adjust))
cluster_termsim@compareClusterResult <- plyr::rbind.fill(cluster_termsim@compareClusterResult, data_empty)
# Set factor levels
cluster_termsim@compareClusterResult$Cluster <- factor(cluster_termsim@compareClusterResult$Cluster, levels = c(
"Conv Dose 1 0-24 hours", "Naïve Dose 1 0-24 hours",
"Conv Dose 2 0-24 hours", "Naïve Dose 2 0-24 hours",
"Conv Dose 3 0-24 hours", "Naïve Dose 3 0-24 hours"
))
treeplot(cluster_termsim,
showCategory = 31,
label_format = 30,
offset.params = list(
tiplab = 9,
bar_tree = 23
),
cluster.params = list(
n = 6,
label_words_n = 0
),
clusterPanel.params = list(
colnames_angle = -90
),
color = "log10_padj"
) +
labs(x = "", fill = "Adjusted p-value\n(-log10)") +
scale_fill_gradient2(low = "#265891", mid = "white", high = "#B80F20") +
theme(
axis.ticks = element_line(size = .5), legend.position = "right",
aspect.ratio = 1,
legend.title = element_text()
)
tpm_gene <- data.table::fread("../data/processed_data_nfcore/batch_1-2/salmon/salmon.merged.gene_tpm.tsv", header = TRUE, sep = "\t") %>%
as.data.frame()
colnames(tpm_gene) <- plyr::mapvalues(sub("_L001|_L002", "", colnames(tpm_gene)), from = sampleTable$sample_ID, to = paste(sampleTable$subject_ID, sampleTable$dose, sampleTable$group, sep = "_"))
names_col <- colnames(tpm_gene)[3:101]
vaccine_tpm <- tpm_gene[64622:64625, ]
vaccine_plot <- tidyr::pivot_longer(vaccine_tpm, cols = names_col) %>%
tidyr::separate(name, into = c("subject_ID", "time", "dose", "group"), sep = "_", remove = TRUE) %>%
mutate(
time_dose = paste(time, dose, sep = "_"),
time_dose = factor(time_dose, levels = c(
"0_dose1", "24_dose1",
"0_dose2", "24_dose2",
"0_dose3", "24_dose3", "48_dose3"
)),
group = case_when(
group == "pos" ~ "Conv",
group == "neg" ~ "Naïve"
),
group = factor(group, levels = c("Conv", "Naïve"))
) %>%
na.omit()
vaccine_plot %>%
filter(grepl("pfizer", gene_id)) %>%
ggplot(aes(x = time_dose, y = value, group = subject_ID, fill = group)) +
geom_line() +
geom_point(shape = 21, size = 5, color = "black", stroke = 1.2) +
scale_fill_manual(values = c("#A9CAF6", "#A9A9A9")) +
scale_y_log10(limits = c(0.001, 1000), breaks = c(0.01, 0.1, 1, 10, 100)) +
labs(y = "Transcripts per million (log)", x = "") +
ggprism::theme_prism() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)
) +
facet_wrap(~group, ncol = 4)